## Broz and Plouffe 2010 IO

library(foreign) 
library(Amelia)
library(mi)
library(hot.deck)
library(mice)
library(mirt)
library(miceadds)

## Load original dataset
bp2010 <- read.dta("BP2010 IO Rep Data.dta.dta")
head(bp2010)
dim(bp2010)

## Drop ID vars
bp2010$c_code <- bp2010$c_name <- NULL

## How many variables? 26: no reduction necessary
dim(bp2010)

## Imputation
case <-c(1:nrow(bp2010))
bp2010 <- cbind(bp2010, case)
head(bp2010)

## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(bp2010)
mean(NAs(bp2010)/nrow(bp2010))*100

## Thus: 10 imputations

set.seed(02138)
bp2010.out <- amelia(bp2010, m = 10, cs = "ifscode", empri = 0.01*nrow(bp2010))

write.amelia(obj=bp2010.out, file.stem = "BP2010 IO Imp Data", format = "dta", separate = FALSE)

## m = 5
set.seed(02138)
bp2010.out.5 <- amelia(bp2010, m = 5, cs = "ifscode", empri = 0.01*nrow(bp2010))

write.amelia(obj=bp2010.out.5, file.stem = "BP2010 IO Imp Data 5", format = "dta", separate = FALSE)

## m = % incomplete observations
complete <- complete.cases(m2009)
sum(!complete)/nrow(m2009)*100
## Thus: 94 imputations
set.seed(02138)
bp2010.out.94 <- amelia(bp2010, m = 94, cs = "ifscode", empri = 0.01*nrow(bp2010))

write.amelia(obj=bp2010.out.94, file.stem = "BP2010 IO Imp Data 94", format = "dta", separate = FALSE)

## MICE
bp2010_mice <- mice(bp2010, m = 10)
datlist <- mids2datlist(bp2010_mice)
bp2010_amelia <- list( "imputations"= datlist)
class(bp2010_amelia) <- "amelia"
write.amelia(obj= bp2010_amelia, file.stem = "BP2010 IO Imp Data MICE", format = "dta", separate = FALSE)

## HD
bp2010.out.hd <- hot.deck(bp2010, m = 10)
bp2010.out.hd <- hd2amelia(bp2010.out.hd)
write.amelia(obj= bp2010.out.hd, file.stem = "BP2010 IO Imp Data HD", format = "dta", separate = FALSE)